External validation, recalibration, and clinical utility of the prognostic model kidney failure risk equation in patients with CKD stages G3-4: a nationwide retrospective cohort analysis in Peru
Main Analysis - Winsorization 1.5%: 3. Descriptive Analysis
Author
Percy Soto Becerra
1 Setup
rm(list =ls())# Use pacman to check whether packages are installed, if not loadif (!require("pacman")) install.packages("pacman")library(pacman)# Unload all package to begin in a session with only base packagespacman::p_unload(all)# Install packagespacman::p_load( here, rio, tidyverse, janitor, knitr, kableExtra, flextable, gtsummary, patchwork, cowplot, kableExtra, ggsci, scales, mice, sf, rnaturalearth, viridis, RColorBrewer, ggmagnify, survival, tidycmprsk, cmprsk, survminer)source(here("Code", "source", "kfre_pi.R"))source(here("Code", "source", "kfre_pr.R"))mypal <-pal_npg("nrc")(9)mypal
# Import dataimp.datosA <-readRDS(here::here("Data", "Tidy", "Main-Winsorize-1_5", "data_impA.rds")) # Create data for CKD 3b-4 (subset of CKD3a-4)imp.datosA <- imp.datosA |>mutate(grf_cat =droplevels(grf_cat))
3 Pre-processing data
nsize <-nrow(imp.datosA |>filter(.imp ==0))imp.datosA <- imp.datosA |>mutate(acr =exp(log_acr), urine_crea =exp(log_urine_crea), urine_album =exp(log_urine_album), acr_cat =case_when(acr <30~"A1", acr <=300& acr >=30~"A2", acr >300~"A3", TRUE~as.character(NA)),ckd_class =case_when( grf_cat %in%c("G1", "G2") & acr_cat =="A1"~"Low risk", (grf_cat %in%c("G3a") & acr_cat =="A1") | (grf_cat %in%c("G1", "G2") & acr_cat =="A2") ~"Moderately increased risk", (grf_cat %in%c("G3b") & acr_cat =="A1") | (grf_cat =="G3a"& acr_cat =="A2") | (grf_cat %in%c("G1", "G2") & acr_cat =="A3") ~"High risk", (grf_cat %in%c("G4", "G5") & acr_cat =="A1") | (grf_cat %in%c("G3b", "G4", "G5") & acr_cat =="A2") | (grf_cat %in%c("G3a", "G3b", "G4", "G5") & acr_cat =="A3") ~"Very high risk" ), ckd_class =factor(ckd_class, levels =c("Low risk", "Moderately increased risk", "High risk", "Very high risk")), # cas = fct_rev(fct_infreq(cas)), # cas2 = fct_rev(fct_infreq(cas2)), acr_miss =if_else(!is.na(acr), "All variable with complete data", "At least 1 variable with missing data"), across(c(hta, dm, death2y, death5y), ~case_when(.x =="1"~"Yes", .x =="0"~"No",TRUE~as.character(NA))), region_peru =case_when( cas %in%c("Lima - Sabogal", "Lima - Almenara", "Lima - Rebagliati") ~"Lima Metropolitana", cas =="Amazonas"~"Amazonas", cas =="Ancash"~"Ancash", cas =="Apurimac"~"Apurímac", cas =="Arequipa"~"Arequipa", cas =="Ayacucho"~"Ayacucho", cas =="Cajamarca"~"Cajamarca", cas =="Cusco"~"Cusco", cas =="Huancavelica"~"Huancavelica", cas =="Huanuco"~"Huánuco", cas =="Huaraz"~"Ancash", cas =="Juliaca"~"Puno", cas =="Junin"~"Junín", cas =="Lambayeque"~"Lambayeque", cas =="La Libertad"~"La Libertad", cas =="Loreto"~"Loreto", cas =="Madre De Dios"~"Madre de Dios", cas =="Moquegua"~"Moquegua", cas =="Moyobamba"~"San Martín", cas =="Pasco"~"Pasco", cas =="Piura"~"Piura", cas =="Puno"~"Puno", cas =="Ica"~"Ica", cas =="Tacna"~"Tacna", cas =="Tarapoto"~"San Martín", cas =="Tumbes"~"Tumbes", cas =="Ucayali"~"Ucayali",TRUE~"Other"# Para manejar cualquier otro caso no contemplado ), cas =case_when( cas %in%c("KAELIN", "Lima - Rebagliati: JB") ~"Lima: Rebagliati", cas =="Lima - Almenara"~"Lima: Almenara", cas =="Lima - Sabogal"~"Lima y Callao: Sabogal", cas =="Amazonas"~"Amazonas", cas =="Ancash"~"Ancash: Anchas (except Huaraz)", cas =="Apurimac"~"Apurímac", cas =="Arequipa"~"Arequipa", cas =="Ayacucho"~"Ayacucho", cas =="Cajamarca"~"Cajamarca", cas =="Cusco"~"Cusco", cas =="Huancavelica"~"Huancavelica", cas =="Huanuco"~"Huánuco", cas =="Huaraz"~"Ancash: Huaraz", cas =="Juliaca"~"Puno: Juliaca", cas =="Junin"~"Junín", cas =="Lambayeque"~"Lambayeque", cas =="La Libertad"~"La Libertad", cas =="Loreto"~"Loreto", cas =="Madre De Dios"~"Madre de Dios", cas =="Moquegua"~"Moquegua", cas =="Moyobamba"~"San Martín: Moyobamba", cas =="Pasco"~"Pasco", cas =="Piura"~"Piura", cas =="Puno"~"Puno: Puno (except Juliaca)", cas =="Ica"~"Ica", cas =="Tacna"~"Tacna", cas =="Tarapoto"~"San Martín", cas =="Tumbes"~"Tumbes", cas =="Ucayali"~"Ucayali",TRUE~"Other"# Para manejar cualquier otro caso no contemplado ), cas_cat =case_when( region_peru %in%c("Lima Metropolitana") ~"Metropolitan Lima",TRUE~"Other Regions" ), risk2y =kfre_pr(imp.datosA, 2),risk5y =kfre_pr(imp.datosA, 5)) |> labelled::set_variable_labels(sex ="Sex", age ="Age (years)", cas ="EsSalud Network", cas_cat ="EsSalud Network", crea ="Serum Creatinine (mg/dL)", eGFR_ckdepi ="eGFR using CKD-EPI (ml/min/1.73m²)", acr ="Albumin-Creatinine Ratio (mg/g)", urine_album ="Urine Albumin (mg/dl)", urine_crea ="Urine Creatinine (mg/dL)", hta ="Hypertension", dm ="Diabetes Mellitus", grf_cat ="eGFR Categories", acr_cat ="Persistent Albuminuria Categories", ckd_class ="CKD KDIGO Classification", eventd5ylab ="5-Year Outcome", eventd2ylab ="2-Year Outcome", eventd5y ="5-Year Outcome", death5y ="5-Year Mortality", death2y ="2-Year Mortality")
Cumulative incidence function curves for kidney failure (sky-blue line) and death before kidney failure (red line) in patients with (A) CKD stages 3a-3b-4 and (B) CKD stages 3b-4
table_coding <-data.frame(Variable =c("age", "male", "eGFR_ckdepi", "acr"), Coding =c("integer number that indicates the age in completed years", "1 = male; 0 = female", "estimated glomerular filtration rate obtained by CKD-EPI formula in $ml/min/1.73m^2$", "albumin-to-creatinine ratio in mg/g"))
knitr::kable(table_coding, escape =TRUE, caption ="Table. Coding of variables") |>kable_styling()
Table. Coding of variables
Variable
Coding
age
integer number that indicates the age in completed years
male
1 = male; 0 = female
eGFR_ckdepi
estimated glomerular filtration rate obtained by CKD-EPI formula in $ml/min/1.73m^2$